# -*- coding: utf-8 -*-
import jieba
from collections import Counter

# 读取处理后的红楼梦文本文件
with open('temp.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# 使用jieba进行分词
words = jieba.lcut(text)

# 过滤掉单个汉字的词，只保留2个字及以上的词语
filtered_words = [word for word in words if len(word) >= 2]

# 统计词频
word_count = Counter(filtered_words)

# 获取高频词汇并按频率排序（前30个）
most_common_words = word_count.most_common(30)

# 打印结果
print("《红楼梦》高频词组列表（按词频从高到低）：")
print("=" * 50)
for i, (word, count) in enumerate(most_common_words, 1):
    print(f"{i:2d}. {word:<15} {count:>4}次")